import scrapy
from scrapy.selector import Selector
from spider1.items import MovieItem
from urllib.parse import urlparse, parse_qs  # 导入必要的函数
from scrapy_splash import SplashRequest
class DoubanSpider(scrapy.Spider):
    name = "college"
    allowed_domains = ["www.shanghairanking.cn"]
    start_urls = ["https://www.shanghairanking.cn/rankings/bcur/202311"]

    def parse(self, response):
        sel = Selector(response)
        list_items = sel.css('#content-box > div.rk-table-box > table > tbody > tr')
        data_clean = lambda x:x.strip()
        for list_item in list_items:
            movie_item = MovieItem()
            movie_item['university'] = data_clean(list_item.css('a.name-cn::text').extract_first())
            movie_item['position']  = data_clean(list_item.css('tr> td:nth-child(3)::text').extract_first())
            movie_item['kind'] = data_clean(list_item.css('tr > td:nth-child(4)::text').extract_first())
            movie_item['score'] = data_clean(list_item.css('tr > td:nth-child(5)::text').extract_first())
            movie_item['layer'] = data_clean(list_item.css('tr > td:nth-child(6)::text').extract_first())
            yield movie_item

